
/*******************************************************************************/
/*  
	This script cleans the ONET data. 
	Download Database 24.2 from https://www.onetcenter.org/db_releases.html
    --
	Alexander Weinberg
	February 16, 2021
*/
/*******************************************************************************/


//________________________________________________________________
// WorkActivity - Import
import delimited "../Raw Data/ONET/Work_Activities.txt", clear

keep if scaleid == "IM"									// we use the importance scale
keep n datavalue onetsoccode elementname elementid
qui replace elementname = "wa_" + elementname

tempfile WorkActivity
save `WorkActivity'

//________________________________________________________________
// WorkContext - Import
import delimited "../Raw Data/ONET/Work_Context.txt", clear

qui keep if scaleid == "CX" 							
keep n datavalue onetsoccode elementname elementid
qui replace elementname = "wc_" + elementname

tempfile WorkContext
save `WorkContext'

//________________________________________________________________
// Append 
use 		 `WorkActivity', clear
append using `WorkContext'

//________________________________________________________________
// Mean n  within onetsoccode, n=survey respondants
replace n = "." if n == "n/a"
destring n, replace
bys onetsoccode: egen N = mean(n)
replace N = round(N)

//________________________________________________________________
// Prepare for reshape
qui replace elementname = subinstr(elementname, " ", "_", .)
qui replace elementname = subinstr(elementname, "/", "_", .)
qui replace elementname = subinstr(elementname, ",", "", .)
qui replace elementname = subinstr(elementname, "-", "_", .)
qui replace elementname = subinstr(elementname, "_with_", "_", .)

//________________________________________________________________
// Var names must be short
qui replace elementname = substr(elementname, 1, 20)   
qui replace elementname = elementname + "_" + elementid
qui replace elementname = subinstr(elementname, ".", "", .)
qui replace elementname = subinstr(elementname, "__", "_", .)

//________________________________________________________________
// Reshape
drop n elementid
rename datavalue _
reshape wide _ , i(onetsoccode) j(elementname) string

//________________________________________________________________
// Crosswalk to SOC codes
merge m:1 onetsoccode using "../Raw Data/Crosswalks/onet_oessoc19_xwalk.dta", keep(match) nogen

//________________________________________________________________
// Average ONET scores within a SOC (weighted by number of obs.)
collapse (mean) _wa_* _wc_* (first) oestitle [aw=N], by(oes2019) 

sort oes2019
order oes2019
save "../Data/ONET/onet_clean", replace

//end
